#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

vec4 INPUT(image2d_t src_data, vec2 tc, __global FilterParam* param)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler,tc );
}

#define FILTER_PI 3.1415926f

__kernel void MAIN(
      __read_only image2d_t src_data,
      __write_only image2d_t dest_data,        //Data in global memory
      __global FilterParam* param,
	  float swing,
	  float frequency, 
	  int alpha)  //[0-100]		// the gpu items/threads should be newW*newH
{

	float time = param->cur_time / param->total_time;
  
	int W = get_global_size(0);
	int H = get_global_size(1);
	int w = get_global_id0( param);
	int h = get_global_id1( param);
	float2 resolution = (float2)(W,H);
	float2 iResolution = resolution;
	int2 gl_FragCoord = (int2)(get_global_id0( param), get_global_id1( param));
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 tc = ((vec2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f)) /resolution.xy;
	
	gl_FragCoord = (int2)(gl_FragCoord.x, H - gl_FragCoord.y - 1);
	vec4 col;
    vec2 ttwo = 2.0f/iResolution.xy;
	float fre = (frequency)/0.2f*FILTER_PI/iResolution.x;
	float y = gl_FragCoord.y - (swing) * 100.0f * iResolution.y /360.0f * sin(fre * (gl_FragCoord.x+time));
  
	if(y < 0.0f || y >= iResolution.y) 
	{
		col = (vec4)(0.0f, 0.0f, 0.0f, 1.0f);
	}
	  else
	{
        vec2 uv = (vec2)(gl_FragCoord.x / iResolution.x, (gl_FragCoord.y/iResolution.y + y )/ iResolution.y);
        float featherX = smoothstep(0.0f,ttwo.x,uv.x) * (1.0f - smoothstep(1.0f-ttwo.x,1.0f,uv.x));
        float featherY = smoothstep(0.0f,ttwo.y,uv.y) * (1.0f - smoothstep(1.0f-ttwo.y,1.0f,uv.y));
		col = INPUT(src_data, uv, param) * featherX * featherY;
	}
  
	vec4 inBGRA = INPUT(src_data,(vec2)(tc.x, 1.0f - tc.y), param);
	vec4 outputCol = inBGRA*(1.0f - alpha/100.0f)+col*alpha/100.0f;
	write_imagef(dest_data,  (int2)(get_global_id(0), get_image_height(src_data) - get_global_id(1) - 1), (vec4)(outputCol.xyz, inBGRA.w));

}
